Locality of sampling
Let’s look at the patterns in search behavior. First, let’s look at the distance between successive choices.
#compare to random
sampleSize <- 400000
randomDistanceDF <- data.frame(x=sample(x = seq(0:7), size = sampleSize, replace=TRUE), y=sample(x = seq(0:7), size = sampleSize, replace=TRUE), environment=c(rep("Rough",sampleSize/2), rep("Smooth", sampleSize/2)), context = rep(c('Conceptual', 'Spatial'), sampleSize/2))
randomDistanceDF <- randomDistanceDF %>%
mutate(distance = abs((x - lag(x,default = NA)) + abs(y - lag(y,default = NA)) ))
#Add classification of choices as stay, near, or far decisions
localityDF <-ddply(df, ~id+trial+context, plyr::summarize, avgDistance=mean(distance, na.rm=T))
localityDF$choiceType <-ifelse(localityDF$avgDistance==0, "Stay", ifelse(localityDF$avgDistance==1, "Near", "Far"))
localityDF$choiceType <- factor(localityDF$choiceType)
choiceProp <- ddply(na.omit(localityDF),.(id,context), function(x) with(x,data.frame(table(choiceType)/length(choiceType),2)))
choiceProp$choiceType <- factor(choiceProp$choiceType, levels=c("Stay", "Near", "Far"))
Let’s first do an ANOVA here
#Two way mixed ANOVA: context is within, environment is between
dd <-ddply(df, ~id+environment+context, plyr::summarize, avgDistance=mean(distance, na.rm=T))
dd$id <- factor(dd$id)
res.aov <- aov(avgDistance ~ environment*context + Error(id/context), data=dd)
anova_stats(res.aov)
#Now compute Bayes factor
bf = anovaBF(avgDistance ~ environment*context+id, data=dd, whichRandom="id")
bf
## Bayes factor analysis
## --------------
## [1] environment + id : 0.2457806 ±0.68%
## [2] context + id : 66.73385 ±0.94%
## [3] environment + context + id : 16.97827 ±2.84%
## [4] environment + context + environment:context + id : 5.690089 ±1.95%
##
## Against denominator:
## avgDistance ~ id
## ---
## Bayes factor type: BFlinearModel, JZS
Let’s plot the results
contextLabels <- c('Conceptual' = 'Conceptual\nTask', 'Spatial' = 'Spatial\nTask', "Rough"="Rough", "Smooth"="Smooth")
p4alt <- ggplot(na.omit(df), aes(x=distance, fill = context, color = context)) +
geom_histogram(aes(y = ..density..*20), position = 'dodge', binwidth=1, color='black')+
stat_density(data = randomDistanceDF, aes(y = ..density..*20), geom="line",color='black', size = .8, bw = 1) +
#geom_density(fill=NA, size = 0.7) +
scale_fill_manual(values=c("#1B9E77", "#D95F02", "Black"), name="") +
scale_color_manual(values = c("#1B9E77", "#D95F02", "Black"), name="") +
ylab("Choices Per Round") +
xlab("Distance Between Choices") +
#xlim(0,6)+
facet_grid(context~environment, labeller = as_labeller(contextLabels))+
scale_x_continuous(breaks = scales::pretty_breaks(n = 5))+
scale_y_continuous(breaks = seq(0, 10, by = 2))+
#ggtitle("Locality of Sampling") +
theme(legend.position='none', strip.background=element_blank(), legend.key=element_rect(color=NA))
p4alt
## Warning: Removed 1 rows containing non-finite values (stat_density).

Let’s try a different version where the differences between task are more salient
contextLabels <- c('Conceptual' = 'Conceptual\nTask', 'Spatial' = 'Spatial\nTask', "Rough"="Rough", "Smooth"="Smooth")
anndf<-data.frame(distance = NA,context = NA,environment = factor("Smooth", levels = c("Rough", "Smooth")), text = 'Random', color = 'black') #for annotation
p4alt <- ggplot(na.omit(df), aes(x=distance, fill = context, color = context)) +
geom_histogram(aes(y = ..density..*20), position = 'identity', binwidth=1, alpha = 0.4)+
stat_density(data = subset(randomDistanceDF, context == 'Conceptual'), aes(y = ..density..*20), geom="line",color='black', size = .8, bw = 1) +
#geom_density(fill=NA, size = 0.7) +
scale_fill_manual(values=c("#1B9E77", "#D95F02", "Black"), name="Task") +
scale_color_manual(values = c("#1B9E77", "#D95F02", "Black"), name="Task") +
ylab("Choices Per Round") +
xlab("Distance Between Choices") +
#xlim(0,6)+
facet_grid(~environment, labeller = as_labeller(contextLabels))+
scale_x_continuous(breaks = scales::pretty_breaks(n = 5))+
scale_y_continuous(breaks = seq(0, 10, by = 2))+
#ggtitle("Locality of Sampling") +
#geom_text(data = anndf, x = 10.5, y = 5, label = "Random", color = 'black', size = 3.5)+
#geom_segment(data = anndf, x = 9, xend = 10, y = 5, yend = 5,colour = "black", size = 1.2)+
theme(legend.position=c(1,1), legend.justification = c(1,1), strip.background=element_blank(), legend.key=element_rect(color=NA))
p4alt
## Warning: Removed 1 rows containing non-finite values (stat_density).

Participants searched over larger distances in the conceptual task than the spatial task (\(t(128)=-3.7\), \(p<.001\), \(d=0.3\), \(BF=59\)). There were no differences across environments (\(t(127)=-0.3\), \(p=.727\), \(d=0.06\), \(BF=.20\)). Note that each trial began on a random selected stimuli. So searching close to the previous selection is not due to a lack of effort. (\(t(128)=-16.2\), \(p<.001\), \(d=1.4\), \(BF>100\))
#Statistical tests reported above
localityDF <- ddply(df, ~id+context,plyr::summarize, avgDistance=mean(distance, na.rm=T))
ttestPretty(subset(localityDF, context == 'Spatial')$avgDistance, subset(localityDF, context == 'Conceptual')$avgDistance, var.equal=T, paired=T)
## [1] "$t(128)=-3.7$, $p<.001$, $d=0.3$, $BF=59$"
localityDF <- ddply(df, ~id+environment,plyr::summarize, avgDistance=mean(distance, na.rm=T))
ttestPretty(subset(localityDF, environment == 'Smooth')$avgDistance, subset(localityDF, environment == 'Rough')$avgDistance, var.equal=T)
## [1] "$t(127)=-0.3$, $p=.727$, $d=0.06$, $BF=.20$"
localityDF <- ddply( df, ~id, plyr::summarize, avgDistance=mean(distance, na.rm=T))
ttestPretty(na.omit(localityDF$avgDistance), mu = mean(randomDistanceDF$distance, na.rm=T))
## [1] "$t(128)=-16.3$, $p<.001$, $d=1.4$, $BF>100$"
Now let’s classify these choices as either Stay (distance = 0), Near (distance = 1), or Far (distance >1).
#choice prop
localityDF <-ddply(df, ~id+trial+context, plyr::summarize, avgDistance=mean(distance, na.rm=T))
localityDF$distance <-localityDF$avgDistance
randomDistanceDF$id <- 0
randomDistanceDF$context <- 'Random'
localityDF <- rbind(localityDF[,c( "context", "distance", 'id')], randomDistanceDF[,c( "context", "distance", 'id')])
localityDF$choiceType <-ifelse(localityDF$distance==0, "Stay", ifelse(localityDF$distance==1, "Near", "Far"))
localityDF$choiceType <- factor(localityDF$choiceType)
choiceProp <- ddply(na.omit(localityDF),.(id,context),
function(x) with(x,
data.frame(table(choiceType)/length(choiceType),2)))
choiceProp$choiceType <- factor(choiceProp$choiceType, levels=c("Stay", "Near", "Far"))
p4 <- ggplot(na.omit(choiceProp), aes(x=choiceType, y = Freq*20, fill=context, color = context))+
stat_summary(fun.y = mean,geom='bar', position='dodge', color='black') +
stat_summary(fun.data = mean_se, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2, color='black' ) +
#scale_y_continuous(labels=percent)+
scale_fill_manual(values=c("#1B9E77", "#D95F02", "Black"), name="") +
scale_color_manual(values=c("#1B9E77", "#D95F02", "Black"), name="") +
#scale_fill_rickandmorty()+
ylab('Choices Per Round ±SE')+
xlab("Choice Type")+
#facet_grid(~contextOrder)+
theme(legend.position= c(0.05, 1), legend.justification=c(0,1), strip.background=element_blank(), legend.key=element_rect(color=NA))
p4
## Warning: Removed 3 rows containing missing values (geom_errorbar).

This seems to paint the same picture as the distance histograms before. Participants made more stay choices in the spatial task (\(t(128)=-2.7\), \(p=.007\), \(d=0.3\), \(BF=3.4\)) and more far choices in the conceptual task (\(t(128)=2.8\), \(p=.006\), \(d=0.3\), \(BF=4.1\)). There were no differences in near choices (\(t(128)=-0.4\), \(p=.688\), \(d=0.05\), \(BF=.11\)).
Search Trajectories
We have this really rich data about how participants navigated the search space. Let’s first look at the number of steps participants took before making a selection
df$steps <- sapply(df$trajectories, function(i) length(fromJSON(as.character(i))))
trajDF <- df%>% group_by(id,context) %>% dplyr::summarize(avgSteps=mean(steps, na.rm=T))
trajContextDF <- df%>% group_by(id,context) %>% dplyr::summarize(avgSteps=mean(steps, na.rm=T))
traEnvjDF <- df%>% group_by(id,environment) %>% dplyr::summarize(avgSteps=mean(steps, na.rm=T))
levels(df$contextOrder)<- c("Spatial First", "Conceptual First")
#comparing context
ggplot(df, aes(x = context, y = steps, fill = context))+
stat_summary(fun.y = mean, geom = "bar", position = "dodge", color='black') +
stat_summary(fun.data = mean_se, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2, color = 'black' ) +
theme(legend.position='right', strip.background=element_blank(), legend.key=element_rect(color=NA), legend.background=element_blank(), text = element_text(size=16, family="sans"))+
#coord_cartesian(ylim=c(0,8)) +
xlab("")+
scale_fill_brewer(palette = "Dark2", name="") +
facet_grid(~contextOrder)+
ylab("Mean Number of Steps \u00B1SE")

medians <- df%>% group_by(environment,context) %>% dplyr::summarize(steps=mean(steps, na.rm=T))
contextLabels <- c('Conceptual' = 'Conceptual\nTask', 'Spatial' = 'Spatial\nTask', "Rough"="Rough", "Smooth"="Smooth")
trajectoryplot <- ggplot(na.omit(df), aes(x=steps, fill = context, color = context)) +
geom_histogram(aes(y = ..density..*20), position = 'dodge', binwidth=1, color='black')+
#stat_density(data = as.data.frame(randomDF), aes(value),geom="line",color='black', size = .8, linetype='dashed') +
#geom_density(fill=NA, size = 0.7) +
scale_fill_manual(values=c("#1B9E77", "#D95F02", "Black"), name="") +
scale_color_manual(values = c("#1B9E77", "#D95F02", "Black"), name="") +
geom_vline(data = medians, aes(xintercept = steps), linetype = 'dashed', size =.7)+
ylab("Choices Per Round") +
xlab("Trajectory Length") +
#xlim(0,6)+
facet_grid(context~environment, labeller = as_labeller(contextLabels))+
scale_x_continuous(breaks = scales::pretty_breaks(n = 5), limits = c(0,20))+
scale_y_continuous(breaks = seq(0, 3, by = 1))+
#ggtitle("Locality of Sampling") +
theme(legend.position='none', strip.background=element_blank(), legend.key=element_rect(color=NA))
trajectoryplot
## Warning: Removed 843 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).
Participants had longer trajectories in the contextual task (\(t(128)=-10.7\), \(p<.001\), \(d=1.0\), \(BF>100\)), although there were no differences across environments (\(t(127)=1.3\), \(p=.213\), \(d=0.2\), \(BF=.38\)).
ttestPretty(subset(trajContextDF, context == 'Spatial')$avgSteps, subset(trajContextDF, context == 'Conceptual')$avgSteps, var.equal=T, paired=T)
## [1] "$t(128)=-10.7$, $p<.001$, $d=1.0$, $BF>100$"
ttestPretty(subset(traEnvjDF, environment == 'Smooth')$avgSteps, subset(traEnvjDF, environment == 'Rough')$avgSteps, var.equal=T)
## [1] "$t(127)=1.3$, $p=.213$, $d=0.2$, $BF=.38$"
We can also compute the efficiency of their trajectories based on \(\text{efficiency} = \frac{\text{Manhattan Distance from start to selection}}{\text{Steps taken}}\)
efficencyDF <- ddply(df, ~id+context, plyr::summarize, efficiency = mean(movement/steps))
efficiencyPlot <- ggplot(efficencyDF, aes(x = context, y = efficiency, color = context, fill = context))+
geom_boxplot(fill=NA, color = 'black', width = .2, outlier.shape = NA)+
geom_quasirandom(alpha = .7)+
stat_summary(fun.y = mean, geom = "point",color = 'black', fill=NA, shape =23, size = 3 ) +
#coord_cartesian(ylim=c(0,2)) +
xlab('')+
scale_fill_brewer(palette = "Dark2", name="") +
scale_color_brewer(palette = "Dark2", name="") +
theme( legend.position='none', strip.background=element_blank(), legend.background=element_blank(), legend.key=element_rect(color=NA))+
ylab("Efficiency \u00B1SE")
efficiencyPlot
Participants are clearly less efficient in the conceptual task than the spatial task ( ttestPretty(subset(efficencyDF, context==‘Conceptual’)\(efficiency, subset(efficencyDF, context=='Spatial')\)efficiency, paired=T)).
Now let’s ask, what factors influence trajectories? Do longer trajectories obtain higher rewards? Yes they do (\(r=.21\), \(p<.001\), \(BF>100\)).
#steps as a function of previous reward
pTrajLengthReward <- ggplot(subset(df,steps<=20), aes(x =steps, y = z, color = context, fill = context))+
#geom_smooth(fill=NA)+
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar") +
coord_cartesian(xlim=c(0,20), ylim=c(0,100)) + #Tukey outlier criterion indicates outliers above 20; min(boxplot.stats(df$steps)$out)
#facet_grid(~environment)+
scale_fill_brewer(palette = "Dark2", name="Task") +
scale_color_brewer(palette = "Dark2", name="Task") +
theme(legend.position=c(1,0.1),legend.justification = c(1,0), strip.background=element_blank(), legend.background=element_blank(), legend.key=element_rect(color=NA))+
ylab("Reward Value ± 95% CI")+
xlab('Trajectory Length')
pTrajLengthReward

We can also look at the entropy of each trajectory (computed over the distribution of directions moved). It seems like participants in the contextual task had higher entropy (consistent with larger step sizes and lower efficiency), and that lower entropy predicts higher reward.
library(entropy)
myent<-function(x){
return(entropy.empirical(table(x)))
}
df$trajEntropy <- sapply(df$trajectories, function(i) myent(fromJSON(as.character(i))))
ggplot(df, aes(x = context, y = trajEntropy, fill = context))+
stat_summary(fun.y = mean,geom='bar', position='dodge', color='black') +
stat_summary(fun.data = mean_se, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2, color='black' ) +
#scale_y_continuous(labels=percent)+
scale_fill_manual(values=c("#1B9E77", "#D95F02"), name="") +
scale_color_manual(values=c("#1B9E77", "#D95F02"), name="") +
facet_grid(~environment)

ggplot(subset(df, trajEntropy>0), aes(x = trajEntropy, y = z, color = context))+
geom_point(alpha = 0.05)+
geom_smooth(method = 'lm')+
#stat_summary(fun.y = mean, geom = "point") +
#stat_summary(fun.data = mean_cl_boot, geom = "errorbar") +
xlab('Entropy')+ ylab('Reward Value')+
scale_fill_manual(values=c("#1B9E77", "#D95F02"), name="") +
scale_color_manual(values=c("#1B9E77", "#D95F02"), name="") +
facet_grid(~environment)

How were both distance and trajectory length influenced by the previous reward value?
#reward and distance
#corTestPretty(na.omit(df)$distance, na.omit(df)$previousReward)
p5 <- ggplot(na.omit(df), aes(x=distance, y = previousReward, color = context, fill=context)) +
#geom_count(alpha=0.2, show.legend = F, position = position_dodge(width=0.1))+
#scale_size_area(max_size = 5)+
#geom_jitter(alpha=0.05, size=0.5)+
#geom_smooth(method = "lm") +
stat_summary(fun.y = mean, geom = 'line', size=1)+
stat_summary(fun.data = mean_se, geom = 'ribbon', alpha = 0.7, color=NA) +
theme_classic() +
labs(y='Previous Reward Value', x = 'Distance Between Selections')+
scale_x_continuous(breaks = scales::pretty_breaks(n = 5))+
scale_color_brewer(palette = 'Dark2', name="Task")+
scale_fill_brewer( palette = 'Dark2', name="Task")+
#coord_flip()+
theme(legend.position=c(1,1), legend.justification = c(1,1), strip.background=element_blank(), legend.key=element_rect(color=NA), legend.background=element_blank())
p5
It seems like participants move further away from their previous selection when the reward value was low (\(r=-.66\), \(p<.001\), \(BF>100\)), suggesting basic evidence of generalization behavior.
Let’s run a mixed model on these results
#Mixed effects modeling
distanceRewardMM <- run_model(brm(distance ~ previousReward+context+previousReward*context +(1+previousReward|id), data=subset(df, !is.na(df$distance)), cores=4, iter = 4000, warmup = 1000, control = list(adapt_delta = 0.99)), modelName = 'distanceRewardMM')
#bayes_R2(distanceRewardMM)
#tab_model(distanceRewardMM) #Really slow!
fixedTerms <- fixef(distanceRewardMM)#Look at fixed terms
#Now generate predictions, removing id as a random effect
xseq <- seq(0,100)
newdat <-data.frame(context = rep(c("Conceptual","Spatial"), each=101), previousReward = rep(xseq,2))
preds <- fitted(distanceRewardMM, re_formula = NA, newdata = newdat, probs = c(0.025, 0.975))
#create new fixed effects dataframe
fixedDF <- data.frame(context = rep(c("Conceptual","Spatial"), each=101), previousReward = rep(xseq,2),
distance = preds[,1], lower = preds[,3], upper = preds[,4] )
p5alt <- ggplot(subset(df, !is.na(df$distance)), aes(previousReward, distance, color = context, fill = context)) +
#geom_hline(yintercept = mean(randomDistanceDF$distance, na.rm=T ), size = 1, color = 'black', linetype='dashed')+
geom_line(data = fixedDF, size = 1)+ #GP is
geom_ribbon(data = fixedDF, aes(ymin=lower, ymax = upper), color = NA, alpha = 0.4 )+
stat_summary(fun.y=mean,geom='point', alpha = 0.8)+
#geom_abline(slope = 1, linetype = 'dashed')+
#coord_cartesian(xlim = c(0,100))+
xlim(c(0,100))+
theme_classic()+
scale_color_brewer(palette = 'Dark2', name="Task")+
scale_fill_brewer( palette = 'Dark2', name="Task")+
#facet_grid(~context, labeller = as_labeller(contextLabels) )+
xlab("Previous Reward Value")+
ylab("Distance Between Selections")+
annotate("text", x = 50, y = 8, label = "paste(italic(b)[prevReward] , \" = -0.06, 95% HPD: [-0.07, -0.06]\")", parse = TRUE)+
theme(legend.position=c(0, 0), legend.justification=c(0,0), strip.background=element_blank(), legend.key=element_blank(), legend.background=element_blank())
p5alt
## Warning: Removed 4966 rows containing non-finite values (stat_summary).

distanceInitialMM <- run_model(brm(movement ~ previousReward+context+previousReward*context +(1+previousReward|id), data=subset(df, !is.na(df$movement)), cores=4, iter = 4000, warmup = 1000, control = list(adapt_delta = 0.99)), modelName = 'distanceInitialMM')
#bayes_R2(distanceInitialMM)
#tab_model(distanceInitialMM)
fixedTerms <- fixef(distanceInitialMM)#Look at fixed terms
#Now generate predictions, removing id as a random effect
xseq <- seq(1,100)
newdat <-data.frame(context = rep(c("Conceptual","Spatial"), each=100), previousReward = rep(xseq,2))
preds <- fitted(distanceInitialMM, re_formula = NA, newdata = newdat, probs = c(0.025, 0.975))
#create new fixed effects dataframe
fixedDF <- data.frame(context = rep(c("Conceptual","Spatial"), each=100), previousReward = rep(xseq,2),
movement = preds[,1], lower = preds[,3], upper = preds[,4] )
p6 <- ggplot(subset(df, !is.na(df$movement)), aes(previousReward, movement, color = context, fill = context)) +
geom_hline(yintercept = mean(randomDistanceDF$distance, na.rm=T ), size = 1, color = 'black', linetype='dashed')+
geom_line(data = fixedDF, size = 1)+ #GP is
geom_ribbon(data = fixedDF, aes(ymin=lower, ymax = upper), color = NA, alpha = 0.4 )+
stat_summary(fun.y=mean,geom='point', alpha = 0.8)+
#geom_abline(slope = 1, linetype = 'dashed')+
#coord_cartesian(xlim = c(0,100))+
xlim(c(0,100))+
theme_classic()+
scale_color_brewer(palette = 'Dark2', name="Task")+
scale_fill_brewer( palette = 'Dark2', name="Task")+
#facet_grid(~context, labeller = as_labeller(contextLabels) )+
xlab("Previous Reward Value")+
ylab("Distance From Initial Position")+
annotate("text", x = 50, y = 8, label = "paste(italic(b)[prevReward] , \" = 0.02, 95% HPD: [0.01, 0.02]\")", parse = TRUE)+
theme(legend.position=c(0, 0.7), legend.justification=c(0,1), strip.background=element_blank(), legend.key=element_blank(), legend.background=element_blank())
p6
## Warning: Removed 7288 rows containing non-finite values (stat_summary).
At ther same time, participants also moved futher away from their initial starting point after observing larger reward values (\(r=.23\), \(p<.001\), \(BF>100\)). Note that the there was a random starting position at the beginning of each trial. So the starting point is not the same as the previous selection. A small distance from the initial starting point is indicative of random search behavior, utilizing the randomness of the initialization. The trend indicates that participants made a larger effort to search in a directed fashion after observing large reward values